*Sidak Yntiso
*Nov 5 2020
****************************
*1. Clean Columns
*2. Add guidelines
*3. Aggregrate to case level
****************************
*load files
use "WA_rawfile_nov20.dta", clear
drop error All_NA



*******************************************************
********************1. Clean Columns*******************
*******************************************************
*get the right dates
split date,p("‐")
replace date2 = "01" if date2=="Jan"
replace date2 = "02" if date2=="Feb"
replace date2 = "03" if date2=="Mar"
replace date2 = "04" if date2=="Apr"
replace date2 = "05" if date2=="May"
replace date2 = "06" if date2=="Jun"
replace date2 = "07" if date2=="Jul"
replace date2 = "08" if date2=="Aug"
replace date2 = "09" if date2=="Sep"
replace date2 = "10" if date2=="Oct"
replace date2 = "11" if date2=="Nov"
replace date2 = "12" if date2=="Dec"
destring date3, replace
replace date3 = date3+1900 if date3>40
replace date3 = date3+2000 if date3<39
tostring date3, replace
replace date = date2 + "/" + date1 + "/" + date3

gen edate = date(date,"MDY")
gen year = year(edate)
drop if year < 2015
codebreak here please

***************************************************************
*fix county
replace county= "WAHKIAKUM"  if strpos(cleaned_contents,"WXHKIAKUM")
replace county= "WALLA WALLA"  if county=="WXLLA WXLLA"
drop if mi(county) //sentenced outside WA

***************************************************************
*the following code is best suited for judges who sentence after 2015
*fix judge
replace judge = subinstr(judge,"CONFINEMENT OVER ONE YEAR","",.)
replace judge = subinstr(judge,"CONFINEMENT UNDER ONE YEAR","",.)
replace judge = subinstr(judge,"PROBATION","",.)
replace judge = subinstr(judge,"DOSA (DOSA","",.)
replace judge = subinstr(judge,"PRISON ‐ ISRB","",.)
replace judge = subinstr(judge,"COMMUNITY CUSTODY BOARD","",.)
replace judge = subinstr(judge,"MISDEMEANOR","",.)
replace judge = subinstr(judge,"DOSA PRISON (DOSA 3 & 4)","",.)
replace judge = subinstr(judge,"DOSA RESIDENTIAL (DOSA 3)","",.)
replace judge = subinstr(judge,"DOSA RESIDENTIAL (DOSA","",.)
replace judge = subinstr(judge,"FIRST TIME OFFENDER","",.)
replace judge = subinstr(judge,"SPECIAL SEX OFFENDER(SSOSA)","",.)
replace judge = subinstr(judge,"SEXUAL MOTIVATION","",.)
replace judge = subinstr(judge,"3 & 4)","",.)
replace judge = subinstr(judge,"SUPERVISED APPEAL","",.)
replace judge = subinstr(judge,"          CC","",.)
replace judge = subinstr(judge,"  CC","",.)
replace judge = subinstr(judge,"ATTEMPT","",.)
replace judge = subinstr(judge,"..","",.)
replace judge = subinstr(judge,"INSANITY ACQUITTAL","",.)
replace judge = subinstr(judge,"‐ Pro Tem","",.)
replace judge = subinstr(judge,"LIFE","",.)
replace judge = subinstr(judge,"DEADLY WEAPON","",.)
replace judge = subinstr(judge,"FIREARM","",.)
replace judge = subinstr(judge,"DOSA PRISON (DOSA","",.)
replace judge = subinstr(judge,"ASSAULT OF LAW ENFORCEMENT PERSONNEL WITH A FI","",.)
replace judge = subinstr(judge,"AN              VUCSA PROTECTED ZONE","",.)
replace judge = subinstr(judge,"E.    FOSA","E. FOSA",.)
replace judge = subinstr(judge,"‐","",.)
replace judge = "" if judge=="CC"
gen judge_len = strlen(judge)
replace judge = "" if judge_len==1|judge_len==2
replace judge = upper(judge)
replace judge = trim(judge)
replace judge = "" if judge=="CC"

*judges
replace judge="" if judge=="UNKNOWN"

tab judge if county=="ADAMS"
replace judge="MILLER, RICHARD W" if judge=="MILLER, R"&county=="ADAMS"

tab judge if county=="BENTON"
replace judge="YULE, DENNIS D" if judge=="YULE, D"&county=="BENTON"
replace judge="YENCOPAL, ALBERT J" if strpos(judge,"YENCOPAL")&county=="BENTON"
replace judge="VANDOORNINCK, KITTY-ANN" if strpos(judge,"VANDOORNINCK")
replace judge="VANDERSCHOOR, VIC L" if strpos(judge,"VANDERSCHOOR")
replace judge="TABER, D" if strpos(judge,"TABER")&county=="BENTON"
replace judge="STAPLES, FRED" if strpos(judge,"STAPLES")&county=="BENTON"
replace judge="PATRICK, R" if strpos(judge,"PATRICK")&county=="BENTON"
replace judge="EKSTROM, ALEXANDER C." if strpos(judge,"EKSTROM")
replace judge="BROWN, CAROLYN" if judge=="BROWN, C"&county=="BENTON"
replace judge="MATHESON, CRAIG J." if strpos(judge,"MATHESON")&county=="BENTON"

tab judge if county=="CHELAN"
replace judge="ALLAN, LESLEY A" if strpos(judge,"ALLAN")&county=="CHELAN"
replace judge="BRIDGES, JOHN E" if strpos(judge,"BRIDGES")&county=="CHELAN"
replace judge="CONE, CHARLES" if strpos(judge,"CONE")&county=="CHELAN"
replace judge="VAN SICKLE, F" if strpos(judge,"SICKLE")&county=="CHELAN"


replace judge="MEINER, G" if strpos(judge,"MEINER")&county=="CLALLAM"
replace judge="VELIE, G" if strpos(judge,"VELIE")&county=="CLALLAM"
tab judge if county=="CLALLAM"

replace judge="WOOLARD, DIANE M" if strpos(judge,"WOLLARD")&county=="CLARK"
replace judge="STAHNKE, DANIEL" if strpos(judge,"STAHNKE")&county=="CLARK"
replace judge="SKIMAS, J" if strpos(judge,"SKIMAS")&county=="CLARK"
replace judge="POYFAIR, EDWIN" if strpos(judge,"POYFAIR")&county=="CLARK"
replace judge="NICHOLS, DAVID A" if strpos(judge,"NICHOLS, DAVID")&county=="CLARK"
replace judge="MORGAN, D" if strpos(judge,"MORGAN,D")&county=="CLARK"
replace judge="MORGAN, J" if strpos(judge,"MORGAN J")&county=="CLARK"
replace judge="BENNETT, ROGER A" if strpos(judge,"BENNETT")&county=="CLARK"
replace judge="CLARK, SUZAN" if strpos(judge,"CLARK, SUZAN")&county=="CLARK"
replace judge="CLARK, ELLEN K" if strpos(judge,"CLARK, ELLEN K")
replace judge="CLARKE, HAROLD D. III" if strpos(judge,"CLARKE, HAROLD D. ")
replace judge="HARRIS, ROBERT L" if strpos(judge,"HARRIS")&county=="CLARK"
replace judge="JOHNSON, BARBARA D" if strpos(judge,"JOHNSON")&county=="CLARK"
replace judge="LADLEY, J" if strpos(judge,"LADLEY")&county=="CLARK"
replace judge="LODGE, T" if strpos(judge,"LODGE")&county=="CLARK"
tab judge if county=="CLARK"

replace judge="COX, M" if strpos(judge,"COX")&county=="COWLITZ"
replace judge="HAHN, MARILYN K." if strpos(judge,"HAAN, MARILYN")&county=="COWLITZ"
replace judge="HALLOWELL, A." if strpos(judge,"HALLOWELL")&county=="COWLITZ"
replace judge="MAHER, DENNIS" if strpos(judge,"MAHER, D")&county=="COWLITZ"
replace judge="MCCULLOCH, D" if strpos(judge,"MCCULLOCH")&county=="COWLITZ"
replace judge="PRICE, F" if strpos(judge,"PRICE")&county=="COWLITZ"
replace judge="RME, JAMES E." if strpos(judge,"RME, J")&county=="COWLITZ"
replace judge="RNING, STEPHEN M" if strpos(judge,"RNING, STEPHEN M")&county=="COWLITZ"

replace judge="VAN SICKLE, F" if strpos(judge,"SICKLE")&county=="DOUGLAS"
replace judge="CONE, CHARLES" if strpos(judge,"CONE")&county=="DOUGLAS"

replace judge="KRISTIANSON, LARRY M" if strpos(judge,"KRISTIANSON")&county=="FERRY"
replace judge="NEILSON, ALLEN C" if strpos(judge,"NEILSON")|strpos(judge,"NIELSON")

replace judge="BROWN, CAROLYN" if judge=="BROWN, C"&county=="FRANKLIN"
replace judge="TABER, D" if strpos(judge,"TABER")&county=="FRANKLIN"
replace judge="DAY, R" if judge=="DAY"&county=="FRANKLIN"|judge=="DAY,R"&county=="FRANKLIN"
replace judge="EKSTROM, ALEXANDER C." if strpos(judge,"EKSTROM")&county=="FRANKLIN"
replace judge="MATHESON, CRAIG J." if strpos(judge,"MATHESON")&county=="FRANKLIN"
replace judge="STAPLES, FRED" if strpos(judge,"STAPLES")&county=="FRANKLIN"
replace judge="YENCOPAL, ALBERT J" if strpos(judge,"YENCOPAL")&county=="FRANKLIN"
replace judge="YULE, DENNIS D" if judge=="YULE, D"&county=="FRANKLIN"|judge=="YULE, D D"&county=="FRANKLIN"

replace judge="ANTOSZ, JOHN M" if strpos(judge,"ANTOSZ")&county=="GRANT"
replace judge="ESTUDILLO, DAVID G." if strpos(judge,"ESTUDILLO")&county=="GRANT"
replace judge="KENDALL, J" if strpos(judge,"KENDALL")&county=="GRANT"
replace judge="MERRITT, C" if strpos(judge,"MERRITT")&county=="GRANT"
replace judge="SPERLINE, EVAN E" if strpos(judge,"SPERLINE")&county=="GRANT"

replace judge="BROWN, STEPHEN E." if judge=="BROWN, S"&county=="GRAYS HARBOR"
replace judge="CHARETTE, R" if strpos(judge,"CHARETTE")&county=="GRAYS HARBOR"
replace judge="EDRDS, DAVID S" if strpos(judge,"EDRDS")&county=="GRAYS HARBOR"
replace judge="FOSCUE, DAVID" if strpos(judge,"FOSCUE")&county=="GRAYS HARBOR"
replace judge="GODFREY, GORDON" if strpos(judge,"GODFREY")&county=="GRAYS HARBOR"
replace judge="KIRKWOOD, J" if strpos(judge,"KIRKWOOD")&county=="GRAYS HARBOR"
replace judge="SCHUMACHER, J" if strpos(judge,"SCHUMACHER")&county=="GRAYS HARBOR"
replace judge="SPENCER, MICHAEL G." if strpos(judge,"SPENCER")&county=="GRAYS HARBOR"

replace judge="COMSTOCK, KENNETH" if strpos(judge,"COMSTOCK")
replace judge="ANDREW P MCCALLIN" if strpos(judge,"MCCALLIN")&county=="ISLAND"
replace judge="GARRATT, JULIA" if strpos(judge,"GARRATT")&county=="ISLAND"
replace judge="MILLS, LEILA" if strpos(judge,"MILLS, L")&county=="ISLAND"
replace judge="PATRICK, H" if strpos(judge,"PATRICK")&county=="ISLAND"
replace judge="PITT, R" if strpos(judge,"PITT")&county=="ISLAND"
replace judge="SHINGTON, CHRISTOPHER" if strpos(judge,"SHINGTON")&county=="ISLAND"
replace judge="VANDOORNINCK, KITTY-ANN" if strpos(judge,"KITTY‐ANN")&county=="ISLAND"
replace judge="WILSON, JOSEPH P." if strpos(judge,"WILSON, J")&county=="ISLAND"
replace judge = trim(judge)

replace judge="HORD, WILLIAM E." if strpos(judge,"HORD, W")&county=="JEFFERSON"
replace judge="ROPER, J" if strpos(judge,"ROPER")&county=="JEFFERSON"
replace judge="" if judge=="PRESIDING JUDGE"
replace judge="" if judge=="."

tab judge if county=="KING"
replace judge="BECKER, MARY K" if strpos(judge,"BECKER, M")&county=="KING"
replace judge="CRAIGHEAD, SUSAN J" if strpos(judge,"CRAIGHEAD")&county=="KING"
replace judge="DOWNING, WILLIAM L" if strpos(judge,"DOWNING")
replace judge="MACK, BARBARA" if strpos(judge,"MACK")&county=="KING"
replace judge="RIETSCHEL, JEAN" if strpos(judge,"RICKSHEL")&county=="KING"
replace judge="ROGERS, JAMES E" if strpos(judge,"ROGERS, JIM")&county=="KING"

replace judge = "SHEA-BROWN, JACKIE" if judge=="SHEA‐BROWN, JACKIE"
replace judge = "MONTOYA-LEWIS, RAQUEL" if judge=="MONTOYA‐LEWIS, RAQUEL"
replace judge = "VANDOORNINCK, KITTY-ANN" if strpos(judge,"DOORNINCK")
replace county = "" if county=="NA"

replace judge ="SULLIVAN, MICHAEL" if strpos(judge,"SULLIVAN, M")&county=="PACIFIC"

replace judge = subinstr(judge, " LRA","",.)
replace judge = subinstr(judge, " FOSA","",.)
replace judge = trim(judge)

replace judge ="SULLIVAN, MICHAEL" if strpos(judge,"SULLIVAN, M")&county=="PACIFIC"

replace judge = "RICHERT, MICHAEL E" if strpos(judge, "RICKERT")& county=="SKAGIT"
replace judge = "COZZA, SALVATORE" if strpos(judge, "COZZA, S")& county=="SPOKANE"
replace judge="GARRATT, JULIA" if strpos(judge,"GARATT")
replace judge="GIBSON, BLAIN G" if strpos(judge,"GIBSON, BLAINE")&county=="YAKIMA"
replace judge="MCCARTHY, MICHAEL" if strpos(judge,"MCCARTHY, M")&county=="YAKIMA"
replace judge="DOWNES, MICHAEL T" if strpos(judge,"DOWNES, M")
replace judge="EDRDS, DAVID S" if strpos(judge,"EDRDS")
replace judge="HAHN, MARILYN K." if strpos(judge,"HAAN, MARILYN")
replace judge="MILLS, LEILA" if strpos(judge,"MILLS, L")
replace judge="RICKERT, MICHAEL E" if strpos(judge,"RICHERT, MICHAEL E")
replace judge="ROGERS, JAMES E." if strpos(judge,"ROGERS, JAMES E")


***************************************************************
*fix charge
replace charge = subinstr(charge,"CONFINEMENT OVER ONE YEAR","",.)
replace charge = subinstr(charge,"CONFINEMENT UNDER ONE YEAR","",.)
replace charge = subinstr(charge,"PROBATION","",.)
replace charge = subinstr(charge,"DOSA (DOSA","",.)
replace charge = subinstr(charge,"PRISON ‐ ISRB","",.)
replace charge = subinstr(charge,"COMMUNITY CUSTODY BOARD","",.)
replace charge = subinstr(charge,"DOSA PRISON (DOSA 3 & 4)","",.)
replace charge = subinstr(charge,"DOSA RESIDENTIAL (DOSA 3)","",.)
replace charge = subinstr(charge,"DOSA RESIDENTIAL (DOSA","",.)
replace charge = subinstr(charge,"FIRST TIME OFFENDER","",.)
replace charge = subinstr(charge,"SPECIAL SEX OFFENDER(SSOSA)","",.)
replace charge = subinstr(charge,"3 & 4)","",.)
replace charge = subinstr(charge,"SUPERVISED APPEAL","",.)
replace charge = subinstr(charge,"          CC","",.)
replace charge = subinstr(charge,"  CC","",.)
replace charge = subinstr(charge,"..","",.)
replace charge = subinstr(charge,"INSANITY ACQUITTAL","",.)
replace charge = subinstr(charge,"‐ Pro Tem","",.)
replace charge = subinstr(charge,"LIFE","",.)
replace charge = subinstr(charge,"DOSA PRISON (DOSA","",.)
replace charge = subinstr(charge,"AN              VUCSA PROTECTED ZONE","",.)
replace charge = subinstr(charge,"CC  ","",.)
replace charge = subinstr(charge,"FOSA  ","",.)
replace charge = subinstr(charge,"‐","",.)
replace charge = trim(charge)
replace charge = "" if charge=="CC"
replace charge = subinstr(charge,"  "," ",.)
replace charge = subinstr(charge,"  "," ",.)
replace charge = subinstr(charge,"  "," ",.)
replace charge = subinstr(charge,"  "," ",.)
replace charge = subinstr(charge,"  "," ",.)
replace charge = subinstr(charge,"  "," ",.)
replace charge = subinstr(charge,"FOSA","",.)
replace charge = subinstr(charge,"SZAMBELAN, Michelle D.","",.)
replace charge = subinstr(charge,"CLARKE, Harold D. III","",.)

replace charge=trim(charge)
replace charge =""+ substr(charge, 12,.) if substr(charge,1,11)=="MISDEMEANOR"
replace charge=trim(charge)

*replace first and last string to NA if first and last thing are numeric
forval n = 0/9{
	replace charge = subinstr(charge, word(charge, 1), "", 1) if substr(charge,1,1)=="`n'"
	replace charge = subinstr(charge, word(charge, -1), "", 1) if substr(charge,-1,.)=="`n'"
	replace charge=trim(charge)

}
replace charge="" if charge=="CC"
*do this again
forval n = 0/9{
	replace charge = subinstr(charge, word(charge, 1), "", 1) if substr(charge,1,1)=="`n'"
	replace charge = subinstr(charge, word(charge, -1), "", 1) if substr(charge,-1,.)=="`n'"
	replace charge=trim(charge)
}
replace charge = "MISD HARASSMENT" if charge=="MISDHARASSMENT"
replace charge = subinstr(charge,"1 FIREARM","FIREARM",.)
replace charge = subinstr(charge,"2 FIREARM","FIREARM",.)
replace charge = subinstr(charge,"1 ATTEMPT","ATTEMPT",.)
replace charge = subinstr(charge,"2 ATTEMPT","ATTEMPT",.)
replace charge = subinstr(charge,"1 TORTURE","TORTURE",.)
replace charge = subinstr(charge,"2 TORTURE","TORTURE",.)
replace charge = subinstr(charge,"1 CONSPIRACY","CONSPIRACY",.)
replace charge = subinstr(charge,"2 CONSPIRACY","CONSPIRACY",.)
replace charge = subinstr(charge,"1 DEADLY WEAPON","DEADLY WEAPON",.)
replace charge = subinstr(charge,"2 DEADLY WEAPON","DEADLY WEAPON",.)
replace charge = subinstr(charge,"1 WITH DEADLY WEAPON","DEADLY WEAPON",.)
replace charge = subinstr(charge,"2 WITH DEADLY WEAPON","DEADLY WEAPON",.)
replace charge = subinstr(charge,"1 FIREARM OR DEADLY WEAPON","FIREARM OR DEADLY WEAPON",.)
replace charge = subinstr(charge,"2 FIREARM OR DEADLY WEAPON","FIREARM OR DEADLY WEAPON",.)
replace charge = subinstr(charge,"1 TORTURE","TORTURE",.)
replace charge = subinstr(charge,"2 TORTURE","TORTURE",.)
replace charge = "THEFT ATTEMPT" if charge=="1 THEFT ATTEMPT"
replace charge = "RELEASE TO LRA" if charge=="LRA 1 RELEASE TO LRA"




*drop charge_len
*gen charge_len = strlen(charge)


*generate max observed
*destring TOTALCONFINEMENTDAYS, replace
*bys charge: egen max_sentence = max(TOTALCONFINEMENTDAYS)
*gen sentence_normalized = TOTALCONFINEMENTDAYS/max_sentence

save "C:\Users\Sidak Yntiso\Dropbox\Recall Paper\Data\Washington State\WA_file_final_updated_nov20.dta", replace



*********************************************************
********************2. Add guidelines********************
*********************************************************
use "merged_guidelines.dta", clear
drop charge charge_first x
decode raw_charge, gen(charge)

merge 1:m charge using "WA_file_final_updated_nov20.dta"
drop if _merge==1

*some missing Statute
replace Statute_RCW ="9A.36.041" if strpos(charge,"ASSAULT 4")&mi(Statute_RCW)
replace Statute_RCW ="9A.76.175" if strpos(charge,"MISD FALSE STATEMENT TO PUBLIC SERVANT")&mi(Statute_RCW)
replace Statute_RCW ="9.41.190" if strpos(charge,"POSSESSION OF A MACHINE GUN OR SHORTBARRELED S")&mi(Statute_RCW)

*some obvs missing stat
replace Statute_RCW = "9A.56.200" if strpos(charge,"ROBBERY")
replace Class = "A" if strpos(charge,"ROBBERY")
replace max_poss = 4565.625 if strpos(charge,"ROBBERY")

replace Statute_RCW = "9A.36.011" if strpos(charge,"ASSAULT 1")
replace Class = "A" if strpos(charge,"ASSAULT 1")
replace max_poss = 8492.063  if strpos(charge,"ASSAULT 1")

*replace Stat = "9A.36.031" if strpos(charge,"ASSAULT 3")&!strpos(charge,"PEACE")
*replace Class = "C" if strpos(charge,"ASSAULT 3")
*replace max_poss = 2237.156  strpos(charge,"ASSAULT 3")&!strpos(charge,"PEACE")



*rules for attempt (see page bottom of page 24, Adult_Sentencing_Manual_2015)
replace max_possible = max_*0.70 if attempt==1
replace max_possible = max_*0.70 if strpos(charge,"SOLICITATION")
replace max_possible = max_*0.70 if strpos(charge,"ATTEMPT")&mi(attempt)&!strpos(charge,"ATTEMPTING")
replace max_possible = max_*0.70 if strpos(charge,"CONSPIRACY")


*Deadly weapon enhancements see page 204 of Adult_Sentencing_Manual_2015- 6903 observations
gen x = strpos(charge,"FIREARM")&Class=="A"|strpos(charge,"DEADLY WEAPON")&Class=="A"| ///
	strpos(charge,"FIREARM")&Class=="B"|strpos(charge,"DEADLY WEAPON")&Class=="B"| ///
	strpos(charge,"FIREARM")&Class=="C"|strpos(charge,"DEADLY WEAPON")&Class=="C"
tab Class  if strpos(charge,"FIREARM")&x==0| strpos(charge,"DEADLY WEAPON")&x==0 //~1000 rows are unidentifable
drop x

gen x = strpos(charge,"FIREARM")&Class=="A"
replace max_possible = max_possible + 5*365.25 if x==1
replace max_possible = max_possible + 2*365.25 if strpos(charge,"DEADLY WEAPON")&Class=="A"&x==0
tab x
drop x
gen x = strpos(charge,"FIREARM")&Class=="B"
replace max_possible = max_possible + 3*365.25 if x==1
replace max_possible = max_possible + 1*365.25 if strpos(charge,"DEADLY WEAPON")&Class=="B"&x==0
drop x
gen x = strpos(charge,"FIREARM")&Class=="C"
replace max_possible = max_possible + 1.5*365.25 if x==1
replace max_possible = max_possible + 0.5*365.25 if strpos(charge,"DEADLY WEAPON")&Class=="C"&x==0
drop x

*Sexual motivation enhancements see page 200 of Adult_Sentencing_Manual_2015. 12 observations
gen x = strpos(charge,"SEXUAL MOTIVATION")&Class=="A"| ///
	strpos(charge,"SEXUAL MOTIVATION")&Class=="B"| ///
	strpos(charge,"SEXUAL MOTIVATION")&Class=="C"
count if strpos(charge,"SEXUAL MOTIVATION") //~233 rows missing
replace max_possible = max_possible + 2*365.25 if x==1&Class=="A"
replace max_possible = max_possible + 1.5*365.25 if x==1&Class=="B"
replace max_possible = max_possible + 1*365.25 if x==1&Class=="C"
drop x

*Protected enhancements see page 200 of Adult_Sentencing_Manual_2015. 250 observations
replace max_possible = 2*max_possible if strpos(charge,"PROTECTED ZONE")


*Correctional facility see page 200 of Adult_Sentencing_Manual_2015. 58 observations
replace max_possible = 1.5+max_possible if strpos(charge,"CORRECTIONAL FACILITY")

***********************************************************************
**********************3. Aggregrate to case level**********************
***********************************************************************

destring TOTALCONFINEMENTDAYS, replace
recode TOTALCONFINEMENTDAYS .=0
destring TOTALMONTHSSUPERVISION, replace
recode TOTALMONTHSSUPERVISION .=0
destring TOTALYEARSSUPERVISION, replace
recode TOTALYEARSSUPERVISION .=0
destring TOTALDAYSSUPERVISION, replace
recode TOTALDAYSSUPERVISION .=0

*if no probation information, replace confinement with NA
replace TOTALCONFINEMENTDAYS = . if TOTALMONTHSSUPERVISION==0&TOTALYEARSSUPERVISION==0&TOTALDAYSSUPERVISION==0&TOTALCONFINEMENTDAYS==0
  
/**create columns**/
**case_num identifies defendants. 
*68,193 unique case
ren case_num def_num
sort def_num edate
egen case_num = group(def_num edate)

*make new chargenumber
bys case_num: gen chargenumber = _n
egen numberofcounts = max(chargenumber), by(case_num)

*identify highest charge per case
gsort +case_num -max_possible -TOTALCONFINEMENTDAYS 
egen highest_temp = max(max_possible), by(case_num)
gen high_charge = max_possible == highest_temp
drop highest_temp

*there may be many highest charges in each case
egen howmanyhighcharges = sum(high_charge), by(case_num)

*298,325 cases have one highest charge. There are 356,993 cases. So 60k cases have multiple highest charge
tab howmanyhighcharges if chargenumber == 1

*identify highest sentence
*sentence days are real. Frustrating things happen when you tell stata to check whether two reals are equal
*So the above code will produce errors 
gen sentence_days2 = TOTALCONFINEMENTDAYS
replace sentence_days2 = -1 if mi(sentence_days2) //replace missing with -1
bysort case_num (sentence_days2): gen high_sentence = sentence_days2==sentence_days2[_N]
drop sentence_days2
replace high_sentence = . if mi(TOTALCONFINEMENTDAYS)

tab high_charge high_sentence

*there may be many highest sentences in each case
egen howmanyhighsentences = sum(high_sentence), by(case_num)

*identify frequency of each charge in the data
bys charge: gen howmanychargesdata = _N
*identify most frequent charge in cases with multiple potential top counts with same recorded sentence
egen highest_temp = max(howmanychargesdata), by(case_num)
gen highfreq = highest_temp == howmanychargesdata
drop highest_temp

*there may be many highest freq charges in each case
egen howmanyhighfreq = sum(highfreq), by(case_num)

*identify first chargenumber in each casenumber
egen highest_temp = min(chargenumber), by(case_num)
gen minchargenumber = highest_temp == chargenumber

/**keep top counts according to following rules**/
*Rules for determining top count of indictment
* 1. For cases with one count, no problem
codebook case_num if numberofcounts==1 //43,864 cases

* 2. For cases with multiple counts and a unique top count, no problem; drop
*    all lower counts
codebook case_num if numberofcounts>1&howmanyhighcharges==1 //15,312 cases

* 3. For cases with multiple counts and multiple potential top counts, keep 
*    count with highest sentence
codebook case_num if numberofcounts>1&howmanyhighcharges>1&howmanyhighsentences==1 // 1,278 cases

* 4. For cases with multiple counts, multiple potential top counts, and multiple
*    potential top counts with same recorded sentence, use most popular charge
*    in data
codebook case_num if numberofcounts>1&howmanyhighcharges>1&howmanyhighsentences>1 //6,986  cases.
codebook case_num if numberofcounts>1&howmanyhighcharges>1&howmanyhighsentences>1&howmanyhighfreq==1 //1,614   cases. 
codebook case_num if numberofcounts>1&howmanyhighcharges>1&howmanyhighsentences>1&howmanyhighfreq>1 //5,372 cases still problematic

* 5. For cases with multiple counts, multiple potential top counts, and multiple
*    potential top counts with same recorded sentence, multiple potential top
*	 counts with same recorded sentence and same frequency in the data, take the
*	 first charge listed in the court file
codebook case_num if numberofcounts>1&howmanyhighcharges>1&howmanyhighsentences>1&howmanyhighfreq>1&minchargenumber==1 //18,574 cases still problematic


* keep cases
*keep if numberofcounts==1| numberofcounts>1&howmanyhighcharges==1&high_charge==1| /// 
*	numberofcounts>1&howmanyhighcharge>1&howmanyhighsentences==1&high_sentence==1| ///
*	numberofcounts>1&howmanyhighcharges>1&howmanyhighsentences>1&howmanyhighfreq==1&highfreq==1| ///
*	numberofcounts>1&howmanyhighcharges>1&howmanyhighsentences>1&howmanyhighfreq>1&minchargenumber==1
	
*****************

drop page line contents cleaned_contents
capture drop sentence_norm max_sentence

gen sentence_normalized = TOTALCONFINEMENTDAYS/max_possible
gen sentence_normed_truncated = sentence_normalized
replace sentence_normed_truncated =1 if sentence_normalized>1
encode Statute, gen(Statute_code)
drop date1-date3 _merge judge_len charge2

save recall_data_washington, replace